clear all
cd "~/Dropbox/Wikipedia/MTURK"

insheet using kalla_aronow_mturk_results.csv, clear
drop hitid hittypeid title description keywords reward creationtime maxassignments requesterannotation assignmentdurationinseconds autoapprovaldelayinseconds expiration numberofsimilarhits lifetimeinseconds assignmentid assignmentstatus accepttime autoapprovaltime approvaltime rejectiontime requesterfeedback worktimeinseconds lifetimeapprovalrate last30daysapprovalrate last7daysapprovalrate approve reject

gen round=.
replace round=1 if inputround=="july"
replace round=2 if inputround=="august"
replace round=3 if inputround=="retired-dead"
replace round=4 if inputround=="nov-pre"
replace round=5 if inputround=="nov-post"

gen name=inputmc
*gen senator=inputmc if inputround=="reitred-dead"

merge m:1 round name using "~/Dropbox/Wikipedia/final-data/final-merged.dta", keepusing(treat_final)
tab inputround _merge, mis

gen positive=1 if treat_final==1|treat_final==2
replace positive=0 if treat_final==3|treat_final==4

label define q1 7 "Very Positive" 6 "Positive" 5 "Somewhat Positive" 4 "Neither" 3 "Somewhat Negative" 2 "Negative" 1 "Very Negative"
label val answerq1answer q1

label define q2 7 "Very Relevant" 6 "Relevant" 5 "Somewhat Relevant" 4 "Neither" 3 "Somewhat Irrelevant" 2 "Irrelevant" 1 "Very Irrelevant"
label val answerq2answer q2

label define q3 7 "Very Favorable" 6 "Favorable" 5 "Somewhat Favorable" 4 "Neither" 3 "Somewhat Unfavorable" 2 "Unfavorable" 1 "Very Unfavorable"
label val answerq3answer q3

gen correct=0
replace correct=1 if inputfact=="Christopher Clark is not who we are discussing. If you are reading this fact, please select the first, third, and then first options for the three questions." & answerq1answer==7 & answerq2answer==5 & answerq3answer==7

replace correct=1 if inputfact=="Christopher Clark is not who we are discussing. If you are reading this fact, please select the third, second, and then last options for the three questions." & answerq1answer==5 & answerq2answer==6 & answerq3answer==1

replace correct=1 if inputfact=="James Madison is not the subject of this fact. If you are reading this fact, please select the first, last and then second options for the three questions." & answerq1answer==7 & answerq2answer==1 & answerq3answer==6

replace correct=1 if inputfact=="James Madison is not the subject of this fact. If you are reading this fact, please select the third option for the three questions." & answerq1answer==5 & answerq2answer==5 & answerq3answer==5

replace correct=1 if inputfact=="Julie Green was an award-winning politician. If you are reading this fact, please select the second, last, and then second options for the three questions." & answerq1answer==6 & answerq2answer==1 & answerq3answer==6

replace correct=1 if inputfact=="Julie Lee was an award-winning politician. If you are reading this fact, please select the last, first and then second options for the three questions." & answerq1answer==1 & answerq2answer==7 & answerq3answer==6

*Do Turkers pay attention?
tab correct if inputround=="test"
//First, on the quality control facts, they were coded 245 times (49 facts, 5 times each). Of these, 237 (96.7%) were coded according to the directions, suggesting that the Mechanical Turk workers were paying attention and carefully reading the facts.

drop if inputround=="test" //don't need these any more

sort workerid submittime
bysort workerid: egen submitorder=seq()
gen view_first=submitorder==1

egen factid=group(inputfact)
egen w_id=group(workerid)

//S2 Fig. Distribution of Mechanical Turk responses on how positive or negative the facts used were perceived to be.
graph box answerq1, over(positive, relabel(1 "Negative" 2 "Positive")) ylabel(1 "Very Neg" 2 "Neg" 3 "Somewhat Neg" 4 "Neither" 5 "Somewhat Pos" 6 "Pos" 7 "Very Pos", angle(65)) ytitle("")

//S3 Fig. Distribution of Mechanical Turk responses on how relevant or irrelevant the facts used were perceived to be.
graph box answerq2, over(positive, relabel(1 "Negative" 2 "Positive")) ylabel(1 "Very Irr" 2 "Irr" 3 "Somewhat Irr" 4 "Neither" 5 "Somewhat Rel" 6 "Rel" 7 "Very Rel", angle(65)) ytitle("")

//S4 Fig. Distribution of Mechanical Turk responses on how favorable or unfavorably the senator seemed after reading the fact.
graph box answerq3, over(positive, relabel(1 "Negative" 2 "Positive")) ylabel(1 "Very Unfav" 2 "Unfav" 3 "Somewhat Unfav" 4 "Neither" 5 "Somewhat Fav" 6 "Fav" 7 "Very Fav", angle(65)) ytitle("")

//S19 Table. Regression Analysis of Mechanical Turk Coding
*Reg on pos/neg (higher=pos)
reg answerq1answer positive, cluster(w_id)

*Reg on relevance (higher=relevant)
reg answerq2answer positive, cluster(w_id)

*Reg on favorability (higher=favorable)
reg answerq3answer positive, cluster(w_id)

*Look just at first response (in case there are demand effects)
reg answerq1answer positive if view_first==1, cluster(w_id)
reg answerq2answer positive if view_first==1, cluster(w_id)
reg answerq3answer positive if view_first==1, cluster(w_id)


*Interrater Agreement -- Fleiss�s kappa across all seven categories
keep answerq1answer answerq2answer answerq3answer round positive factid
bysort factid: egen coder=seq()
reshape wide answerq*, i(factid) j(coder)

foreach var of varlist answerq1answer1 answerq2answer1 answerq3answer1 answerq1answer2 answerq2answer2 answerq3answer2 answerq1answer3 answerq2answer3 answerq3answer3 answerq1answer4 answerq2answer4 answerq3answer4 answerq1answer5 answerq2answer5 answerq3answer5 {
	replace `var'=4 if missing(`var')
	
}
kap answerq1*
kap answerq2*
kap answerq3*

//Fleiss�s kappa collapsed to three categories
foreach var of varlist answerq1answer1 answerq2answer1 answerq3answer1 answerq1answer2 answerq2answer2 answerq3answer2 answerq1answer3 answerq2answer3 answerq3answer3 answerq1answer4 answerq2answer4 answerq3answer4 answerq1answer5 answerq2answer5 answerq3answer5 {
	recode `var' (2=1) (3=1) (5=7) (6=7)
}
kap answerq1*
kap answerq2*
kap answerq3*

